This vignette shows how to use Signac with Seurat. There are three parts: Seurat, Signac and then visualization. We use an example CITE-seq data set from 10X Genomics.
Start with the standard pre-processing steps for a Seurat object.
library(Seurat)
Download data from 10X Genomics.
download.file("https://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_10k_protein_v3/pbmc_10k_protein_v3_filtered_feature_bc_matrix.h5", destfile = "pbmc_10k_protein_v3_filtered_feature_bc_matrix.h5")
Create a Seurat object, and then perform SCTransform normalization. Note:
# load dataset
E = Read10X_h5(filename = "pbmc_10k_protein_v3_filtered_feature_bc_matrix.h5")
pbmc <- CreateSeuratObject(counts = E$`Gene Expression`, project = "pbmc")
# store mitochondrial percentage in object meta data
pbmc <- PercentageFeatureSet(pbmc, pattern = "^MT-", col.name = "percent.mt")
# run sctransform
pbmc <- SCTransform(pbmc,vars.to.regress = "percent.mt", verbose = FALSE)
Perform dimensionality reduction by PCA and UMAP embedding. Note: Signac actually needs these commands to be run, since it uses the nearest neighbor graph.
# These are now standard steps in the Seurat workflow for visualization and clustering
pbmc <- RunPCA(pbmc, verbose = FALSE)
pbmc <- RunUMAP(pbmc, dims = 1:30, verbose = FALSE)
pbmc <- FindNeighbors(pbmc, dims = 1:30, verbose = FALSE)
DimPlot(pbmc, label = TRUE) + NoLegend()
Now, make sure you have the Signac package installed.
devtools::install_github("mathewchamberlain/Signac")
library(Signac)
Generate Signac labels for the Seurat object. Note: * This is the main function for Signac. * Optionally, you can do parallel computing by setting num.cores > 1 in the Signac function. * Run time is ~minutes for ~10,000 cells.
# Run Signac
data("training_HPCA")
labels <- Signac(pbmc, R = training_HPCA) # optionally do parallel computing by setting num.cores > 1
celltypes = Generate_lbls(labels, E = pbmc)
Now we can visualize the cell type classifications at many different levels: Immune and nonimmune
pbmc <- Seurat::AddMetaData(pbmc, metadata=celltypes$Immune, col.name = "immmune")
pbmc <- Seurat::SetIdent(pbmc, value='immmune')
DimPlot(pbmc)
pbmc <- Seurat::AddMetaData(pbmc, metadata=celltypes$L2, col.name = "celltypes")
pbmc <- Seurat::SetIdent(pbmc, value='celltypes')
DimPlot(pbmc)
lbls = factor(celltypes$CellTypes)
levels(lbls) <- sort(unique(lbls))
pbmc <- Seurat::AddMetaData(pbmc, metadata=lbls, col.name = "celltypes")
pbmc <- Seurat::SetIdent(pbmc, value='celltypes')
DimPlot(pbmc)
lbls = factor(celltypes$CellStates)
levels(lbls) <- sort(unique(lbls))
pbmc <- Seurat::AddMetaData(pbmc, metadata=lbls, col.name = "celltypes")
pbmc <- Seurat::SetIdent(pbmc, value='celltypes')
DimPlot(pbmc)
lbls = factor(celltypes$CellStates_novel)
levels(lbls) <- sort(unique(lbls))
pbmc <- Seurat::AddMetaData(pbmc, metadata=lbls, col.name = "celltypes")
pbmc <- Seurat::SetIdent(pbmc, value='celltypes')
DimPlot(pbmc)
Add protein expression information
pbmc[["ADT"]] <- CreateAssayObject(counts = E$`Antibody Capture`[,colnames(E$`Antibody Capture`) %in% colnames(pbmc)])
## Warning: Feature names cannot have underscores ('_'), replacing with dashes ('-')
pbmc <- NormalizeData(pbmc, assay = "ADT", normalization.method = "CLR")
pbmc <- ScaleData(pbmc, assay = "ADT")
Visualize protein information on the RNA clustering result
FeaturePlot(pbmc, features = c("CD8a-TotalSeqB", "CD8A"), ncol = 2)
## Warning: Could not find CD8a-TotalSeqB in the default search locations, found in ADT assay
## instead
RidgePlot(pbmc, features = c("CD8a-TotalSeqB"))
## Warning: Could not find CD8a-TotalSeqB in the default search locations, found in ADT assay
## instead
Identify differentially expressed proteins between clusters
# Downsample the clusters to a maximum of 500 cells each (makes the heatmap easier to see for
# small clusters)
pbmc.small <- subset(pbmc, downsample = 500)
# Find protein markers for all clusters, and draw a heatmap
adt.markers <- FindAllMarkers(pbmc.small, assay = "ADT", only.pos = TRUE, verbose = F)
DoHeatmap(pbmc.small, features = unique(adt.markers$gene), assay = "ADT", angle = 90) + NoLegend()
DefaultAssay(pbmc) <- "ADT"
Cluster the data based on protein expression information.
pbmc <- FindVariableFeatures(pbmc, selection.method = "vst")
pbmc <- ScaleData(pbmc)
pbmc <- RunPCA(pbmc, verbose = F)
## Warning in irlba(A = t(x = object), nv = npcs, ...): You're computing too large a
## percentage of total singular values, use a standard svd instead.
## Warning in irlba(A = t(x = object), nv = npcs, ...): did not converge--results might be
## invalid!; try increasing work or maxit
pbmc <- FindNeighbors(pbmc)
pbmc <- FindClusters(pbmc, resolution = 0.8)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 7865
## Number of edges: 260140
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8849
## Number of communities: 15
## Elapsed time: 0 seconds
pbmc <- RunUMAP(pbmc, dims = 1:10)
Visualize Signac cell types on protein clusters
lbls = factor(celltypes$CellTypes)
levels(lbls) <- sort(unique(lbls))
pbmc <- Seurat::AddMetaData(pbmc, metadata=lbls, col.name = "celltypes")
pbmc <- Seurat::SetIdent(pbmc, value='celltypes')
DimPlot(pbmc)
lbls = factor(celltypes$CellStates)
levels(lbls) <- sort(unique(lbls))
pbmc <- Seurat::AddMetaData(pbmc, metadata=lbls, col.name = "celltypes")
pbmc <- Seurat::SetIdent(pbmc, value='celltypes')
DimPlot(pbmc)
FeaturePlot(pbmc, features = c("CD8a-TotalSeqB", "CD8A"))
## Warning: Found the following features in more than one assay, excluding the default. We
## will not include these in the final dataframe: CD8A
## Warning in FetchData(object = object, vars = c(dims, "ident", features), : The following
## requested variables were not found: CD8A
## R version 3.5.0 (2018-04-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
##
## Matrix products: default
## BLAS: /usr/lib64/libblas.so.3.4.2
## LAPACK: /site/ne/app/x86_64/R/v3.5.0/lib64/R/lib/libRlapack.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8
## [4] LC_COLLATE=en_US.UTF-8 LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C LC_ADDRESS=C
## [10] LC_TELEPHONE=C LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] bzinb_1.0.4 Signac_2.0.7 pscl_1.5.5 MASS_7.3-51.1
## [5] forcats_0.5.0 stringr_1.4.0 dplyr_0.8.5 purrr_0.3.3
## [9] readr_1.3.1 tidyr_1.1.1 tibble_3.0.3 tidyverse_1.3.0
## [13] sctransform_0.2.1 Seurat_3.2.0 ggplot2_3.3.2 veni_1.0.3
## [17] Matrix.utils_0.9.8 Matrix_1.2-18 biomaRt_2.38.0
##
## loaded via a namespace (and not attached):
## [1] rappdirs_0.3.1 bit64_0.9-7 knitr_1.28
## [4] irlba_2.3.3 data.table_1.13.0 rpart_4.1-15
## [7] RCurl_1.98-1.1 generics_0.0.2 BiocGenerics_0.28.0
## [10] cowplot_1.0.0 RSQLite_2.2.0 RANN_2.6.1
## [13] europepmc_0.4 future_1.16.0 bit_1.1-15.2
## [16] enrichplot_1.2.0 spatstat.data_1.4-3 xml2_1.3.1
## [19] lubridate_1.7.8 httpuv_1.5.2 assertthat_0.2.1
## [22] viridis_0.5.1 xfun_0.12 hms_0.5.3
## [25] evaluate_0.14 promises_1.1.0 fansi_0.4.1
## [28] progress_1.2.2 caTools_1.17.1.1 dbplyr_1.4.4
## [31] readxl_1.3.1 igraph_1.2.5 DBI_1.1.0
## [34] htmlwidgets_1.5.1 stats4_3.5.0 ellipsis_0.3.0
## [37] RSpectra_0.16-0 backports_1.1.6 deldir_0.1-28
## [40] vctrs_0.3.2 Biobase_2.42.0 ROCR_1.0-7
## [43] abind_1.4-5 withr_2.1.2 ggforce_0.3.2
## [46] packrat_0.5.0 grr_0.9.5 triebeard_0.3.0
## [49] checkmate_2.0.0 aplpack_1.3.3 prettyunits_1.1.1
## [52] goftest_1.2-2 cluster_2.1.0 DOSE_3.8.2
## [55] ape_5.3 lazyeval_0.2.2 crayon_1.3.4
## [58] hdf5r_1.0.1 pkgconfig_2.0.3 labeling_0.3
## [61] tweenr_1.0.1 nlme_3.1-145 rlang_0.4.8
## [64] globals_0.12.5 RJSONIO_1.3-1.4 lifecycle_0.2.0
## [67] miniUI_0.1.1.1 neuralnet_1.44.2 modelr_0.1.8
## [70] rsvd_1.0.3 cellranger_1.1.0 tcltk_3.5.0
## [73] randomForest_4.6-14 polyclip_1.10-0 lmtest_0.9-37
## [76] graph_1.60.0 urltools_1.7.3 Rhdf5lib_1.4.3
## [79] boot_1.3-24 zoo_1.8-4 reprex_0.3.0
## [82] ggridges_0.5.2 png_0.1-7 viridisLite_0.3.0
## [85] bitops_1.0-6 KernSmooth_2.23-16 blob_1.2.1
## [88] qvalue_2.14.1 gridGraphics_0.5-0 S4Vectors_0.20.1
## [91] reactome.db_1.66.0 scales_1.1.0 memoise_1.1.0
## [94] graphite_1.28.2 magrittr_1.5 plyr_1.8.6
## [97] ica_1.0-2 gplots_3.0.3 gdata_2.18.0
## [100] compiler_3.5.0 lsei_1.2-0 RColorBrewer_1.1-2
## [103] lme4_1.1-23 fitdistrplus_1.0-14 cli_2.0.2
## [106] listenv_0.8.0 patchwork_1.0.1 pbapply_1.4-2
## [109] mgcv_1.8-31 tidyselect_1.1.0 stringi_1.4.6
## [112] yaml_2.2.1 GOSemSim_2.8.0 ggrepel_0.8.2
## [115] grid_3.5.0 fastmatch_1.1-0 tools_3.5.0
## [118] future.apply_1.4.0 parallel_3.5.0 rstudioapi_0.11
## [121] gridExtra_2.3 farver_2.0.3 Rtsne_0.15
## [124] ggraph_2.0.3 digest_0.6.18 rvcheck_0.1.8
## [127] BiocManager_1.30.10 shiny_1.4.0.2 Rcpp_1.0.4.6
## [130] broom_0.7.0 later_1.0.0 RcppAnnoy_0.0.16
## [133] httr_1.4.1 AnnotationDbi_1.44.0 npsurv_0.4-0
## [136] colorspace_1.4-0 rvest_0.3.5 XML_3.99-0.3
## [139] fs_1.4.1 tensor_1.5 reticulate_1.16-9001
## [142] IRanges_2.16.0 splines_3.5.0 uwot_0.1.8
## [145] statmod_1.4.34 spatstat.utils_1.17-0 graphlayouts_0.6.0
## [148] ggplotify_0.0.5 plotly_4.9.2.1 xtable_1.8-4
## [151] jsonlite_1.6.1 nloptr_1.2.2.1 spatstat_1.64-1
## [154] tidygraph_1.2.0 UpSetR_1.4.0 glasso_1.11
## [157] R6_2.4.1 pillar_1.4.3 htmltools_0.4.0
## [160] mime_0.9 glue_1.4.0 fastmap_1.0.1
## [163] minqa_1.2.4 BiocParallel_1.16.6 class_7.3-16
## [166] codetools_0.2-16 fgsea_1.8.0 lattice_0.20-41
## [169] leiden_0.3.3 gtools_3.8.1 ReactomePA_1.26.0
## [172] GO.db_3.7.0 limma_3.38.3 survival_3.2-3
## [175] rmarkdown_2.1 munsell_0.5.0 e1071_1.7-3
## [178] DO.db_2.9 rhdf5_2.27.13 haven_2.3.1
## [181] reshape2_1.4.4 gtable_0.3.0
A work by Mathew Chamberlain
mathew.chamberlain@sanofi.com